rm(list = ls())
library(tidyverse)
## ── Attaching packages ────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
options(max.print = 1000)
surveys_complete <- read_csv('data/surveys_complete.csv')
## Parsed with column specification:
## cols(
## record_id = col_double(),
## month = col_double(),
## day = col_double(),
## year = col_double(),
## plot_id = col_double(),
## species_id = col_character(),
## sex = col_character(),
## hindfoot_length = col_double(),
## weight = col_double(),
## genus = col_character(),
## species = col_character(),
## taxa = col_character(),
## plot_type = col_character()
## )
Visualize data using ggplot2
template: ggplot(data= , mapping = aes(
use function to… :bind plot to speciic data frame : define mapping :add geom and use operator. 2 continuous variables= use geom_point()
ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))+ geom_point()
or draw plot with…
surveys_plot <- ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))
## syntax to add layers...
surveys_plot +
geom_point()
CHALLENGE=SCATTERPLOTS
package hexbin ggplot (assigns colours to hexagons based on observations falling in boundaries)= hexagonal binning
library("hexbin")
surveys_plot <- ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))
surveys_plot +
geom_hex()
Strengths:
Weaknesses:
duild Plots Iteratively -define dataset, lay out axis and choose geom -modify plot (extract more info) aka add transparency
ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) + geom_point(alpha = 0.1, color = 'blue')
ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) + geom_point(alpha = 0.1, aes(color = species_id))
CHALLENGE- ITERATIVE PLOTS
scatterplot of weight over species_id with diff colours
challenge_plot <- ggplot(data = surveys_complete, mapping = aes(x = species_id, y= weight))
challenge_plot +
geom_point(aes(color = plot_type))
BOX PLOT distribution of weight in each species
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight))+
geom_boxplot()
add points to this plot to see measurements and their distribution
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight))+
geom_boxplot(alpha = 0)+
geom_jitter(alpha = 0.3, color = "tomato")
CHALLENGE- BOX PLOTS
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
geom_violin(alpha = 0) +
geom_jitter(alpha = 0.3, color = "tomato")+
scale_y_log10()
other variables—
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_jitter(alpha = 0.1) +
geom_point(aes(color = plot_id))+
geom_boxplot(color = 'red')
# consider changing plot_id to a factor
ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) +
geom_jitter(alpha = 0.1, aes(color = factor(c(plot_id)))) +
geom_boxplot(color = 'gray')
changing from integer to factor causes color display on right side- shows colour and its corresponding number key
##PART 2; PLOTTING TIME SERIES DATA
yearly_counts <- surveys_complete %>%
count(year, genus)
ggplot(data = yearly_counts, aes(x = year, y = n)) +
geom_line()
##need to modify by having ggplot draw line for each genus (include group=genus)
ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
geom_line()
##add colours
ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
geom_line()
yearly_counts %>%
ggplot(mapping = aes(x = year, y = n, color = genus)) +
geom_line()
##The pipe operator can also be used to link data manipulation with consequent data visualization.
yearly_counts_graph <- surveys_complete %>%
count(year, genus) %>%
ggplot(mapping = aes(x = year, y = n, color = genus)) +
geom_line()
yearly_counts_graph
##FACETING: ggplot can faceting which splits one plot into multiple based on a factor in the dataset
ggplot(data = yearly_counts, aes(x = year, y = n)) +
geom_line() +
facet_wrap(facets = vars(genus))
##now want to split by the sex…
yearly_sex_counts <- surveys_complete %>%
count(year, genus, sex)
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(facets = vars(genus))
##can also do by sex AND genus
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(sex), cols = vars(genus))
##also organize only by rows
# one column, facet by rows
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_grid(rows = vars(genus))
##ggplot2 themes: can be customized using theme()–pre-loaded ones available: ggplot(data = yearly_sex_counts,For example, we can change our previous graph to have a simpler white background using the theme_bw() function:
ggplot(data = yearly_sex_counts,
mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
theme_bw()
##theme_minimal() and theme_light() are popular, and theme_void() can be useful as a starting point to create a new hand-crafted theme.
## CHALLENGE
yearly_weight <- surveys_complete %>%
group_by(year, species_id) %>%
summarise(avg_weight = mean(weight))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
ggplot(data = yearly_weight, mapping = aes(x=year, y=avg_weight)) +
geom_line() +
facet_wrap(vars(species_id)) +
theme_bw()
##CUSTOMIZATION
ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw()
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(text=element_text(size = 16))
##can fool around and italicize titles etc
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
strip.text = element_text(face = "italic"),
text = element_text(size = 16))
##THEMES CAN BE SAVED
grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12,
angle = 90, hjust = 0.5,
vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
text=element_text(size = 16))
ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
geom_boxplot() +
grey_theme
##CHALLENGE
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
geom_line( size = 2)+
geom_line(stat = "identity")+
scale_color_hue(c= 35, l=80)+
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals")+
theme_bw() +
theme(axis.text.x = element_text(colour = "Blue", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "red", size = 12),
strip.text = element_text(face = "italic"),
text = element_text(size = 16),
legend.box = "horizontal")
##ARRANGING AND EXPORTING PLOTS–The gridExtra package allows us to combine separate ggplots into a single figure using grid.arrange():
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
spp_weight_boxplot <- ggplot(data = surveys_complete,
aes(x = species_id, y = weight)) +
geom_boxplot() +
labs(x = "Species",
y = expression(log[10](Weight))) +
scale_y_log10() +
labs()
spp_count_plot <- ggplot(data = yearly_counts,
aes(x = year, y = n, color = genus)) +
geom_line() +
labs(x = "Year", y = "Abundance")
grid.arrange(spp_weight_boxplot, spp_count_plot, ncol = 2, widths = c(4, 6))
my_plot <- ggplot(data = yearly_sex_counts,
aes(x = year, y = n, color = sex)) +
geom_line() +
facet_wrap(vars(genus)) +
labs(title = "Observed genera through time",
x = "Year of observation",
y = "Number of individuals") +
theme_bw() +
theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90,
hjust = 0.5, vjust = 0.5),
axis.text.y = element_text(colour = "grey20", size = 12),
text = element_text(size = 16))
ggsave("name_of_file.png", my_plot, width = 15, height = 10)
## This also works for grid.arrange() plots
combo_plot <- grid.arrange(spp_weight_boxplot, spp_count_plot, ncol = 2,
widths = c(4, 6))
ggsave("combo_plot_abun_weight.png", combo_plot, width = 10, dpi = 300)
## Saving 10 x 5 in image